set more off 
pause off
set logtype text
set mem 500M

*************** DESCRIPTION *************************************
* Loads BEA Multinational Enterprise data for US business presence
* abroad. Data available annually. 
*
*	Inputs: - LevelKey_granular_MNE.xls: consolidates segmentation
*			  over time and maps to BEA
*			- BEA_MNE datasets
*	Outputs: 2.intermediate/BEA_mne: Consolidated MNE dataset, 
*			 merged with main analysis files at MNE hierarchy. 
*
* IMPORTANT NOTE: We aim to map available data to NAICS-based BEA segments 
* as best as possible, but the mapping is IMPERFECT before 1999
* because MNE data follows an SIC-based segmentation. As a result,  
* we study industry-level investment only after 1999, and 
* aggregates over the entire period. 
*****************************************************************


** DEFINE GRANULARITY FOR DATA LOADING AND INDUSTRY NAMES
* Use LevelKey_test.xls to validate manual testing 
import excel 1.user_inputs\LevelKey_granular_MNE.xls, sheet("MNE_post99_new") firstrow clear
drop if keep_ind ~= 1
save 6.Temp\levelkey_mne_post99.dta, replace

import excel 1.user_inputs\LevelKey_granular_MNE.xls, sheet("MNE_pre98_new") firstrow clear
drop if keep_ind ~= 1
drop raw_name NAICS
save 6.Temp\levelkey_mne_pre98.dta, replace


***


/* ------------------*/
/* 		MOFA DATA    */
/* ------------------*/


* 09-14: Affiliate financials
forvalues ii = 2009(1)2014{

di `ii'
*local ii = 2014
import excel 0.raw_inputs/BEA_MNE/US_abroad/selected_mofas_indy.xls, sheet(`ii') cellrange(A5:N283) firstrow clear

rename A ind
rename B a1a_n
rename Totalassets a1a_at
rename Sales a1a_sale
rename Netincome a1a_ni
rename Valueadded a1a_va
rename Researchanddev a1a_rd
rename Compensationofemployees a1a_comp
if `ii' == 2014 {
 rename Capitalexp a1a_capx
}

keep ind a1a*
drop if ind == ""

merge m:1 ind using 6.Temp\levelkey_mne_post99
drop _merge 
keep if keep_ind == 1

g year = `ii'
tostring a1a_n, replace 

if `ii' ~= 2009 {
compress
append using temp0914 
}

save temp0914, replace
sleep 600
}


**


* 09-13: Affiliate CAPX
forvalues ii = 2009(1)2013{

di "`ii'"
import excel 0.raw_inputs/BEA_MNE/US_abroad/`ii'/CAPX.xls, sheet("II.C 5") cellrange(A8:B100) firstrow clear

rename A ind
rename B a1a_capx

drop if ind == ""
merge m:1 ind using 6.Temp\levelkey_mne_post99
drop _merge 
keep if keep_ind == 1

g year = `ii'

if `ii' ~= 2009 {
compress
append using temp_capx0913 
}

save temp_capx0913, replace
sleep 600
}

merge 1:1 ind year using temp0914
drop _merge
keep ind ind_short year a1a*
tostring *,replace
save temp0914, replace
erase temp_capx0913.dta



***


* 99-08
local mylist a b c d
forvalues ii = 1999(1)2008{
foreach jj of local mylist {

if ("`jj'" ~= "d" & `ii' < 2008) | `ii' == 2008 {

	di "`ii'`jj'"

	*local ii = 2003
	*local jj = "c"
	import excel 0.raw_inputs/BEA_MNE/US_abroad/`ii'/Tab3A2`jj'.xls, cellrange(A5) firstrow clear

	if `ii' < 2004 & `ii' ~= 1999{
		rename A ind
		rename B a1_naff_mne
	} 
	else{
		rename A ind
		rename C a1a_n
	}
	*VA named gross product pre01
	if `ii' < 2001 {
		rename Grossproduct a1a_va
	}
	else{
		rename Valueadded a1a_va
	}
	* CAPX not available in 1999 and 2004
	if `ii' ~= 1999 & `ii' ~= 2004 {
		rename Capitalexp a1a_capx
	}

	rename Totalassets a1a_at
	rename Sales a1a_sale
	rename Netincome a1a_ni
	*rename Researchanddev a1a_rd
	rename Compensationofemployees a1a_comp

	keep ind a1a_*
	rename ind ind`ii'

	merge m:1 ind`ii' using 6.Temp\levelkey_mne_post99
	keep if _merge == 3
	drop if ind`ii' == "" | ind`ii' == " "
	drop _merge 
	keep if keep_ind == 1

	keep ind`ii' ind_short a1a*
	rename ind`ii' ind

	g year = `ii'
	tostring *, replace 

	if "`ii'`jj'" ~= "1999a" {
	compress
	append using temp9908 
	}

	save temp9908, replace
	sleep 600
}
}
}



***

* CAPX: 00,04
local mylist 1999 2004 
foreach ii of local mylist {

di "`ii'"
import excel 0.raw_inputs/BEA_MNE/US_abroad/`ii'/CAPX.xls, cellrange(A7:B100) firstrow clear

rename A ind
rename B a1a_capx

keep ind a1a_*
rename ind ind`ii'

merge m:1 ind`ii' using 6.Temp\levelkey_mne_post99
keep if _merge == 3
drop if ind`ii' == "" | ind`ii' == " "
drop _merge 
keep if keep_ind == 1

keep ind`ii' ind_short a1a*
rename ind`ii' ind

g year = `ii'
tostring *,replace

if `ii' ~= 1999 {
compress
append using temp_capx0004
}

save temp_capx0004, replace
sleep 600
}

merge 1:1 ind year using temp9908
drop _merge
keep ind ind_short year a1a*
save temp9908, replace
erase temp_capx0004.dta


***


* 98
import excel 0.raw_inputs\BEA_MNE\US_abroad\1998\Tab3A2a.xls, cellrange(A14:N110) firstrow clear
tostring *, replace
save temp,replace

import excel 0.raw_inputs\BEA_MNE\US_abroad\1998\Tab3A2b.xls, cellrange(A14:N109) firstrow clear
tostring *, replace
append using temp
erase temp.dta

rename A ind
rename B a1a_n
rename C a1a_at
rename E a1a_capx
rename F a1a_sale
rename G a1a_ni
rename H a1a_va
rename I a1a_comp

keep ind a1a_*
drop if ind == ""

replace ind = subinstr(ind, ".", "",.)
replace ind = strtrim(ind)
rename ind ind98

merge m:1 ind98 using 6.Temp\levelkey_mne_pre98
drop _merge 
keep if keep_ind == 1

g year = 1998
keep ind_short year a1*
order ind_short year a1*

tostring *, replace
save temp98, replace


***



*95-97
forvalues ii = 95(1)97{
*local ii = 95
import delimited "0.raw_inputs\BEA_MNE\US_abroad\1993-1997\3a2_`ii'.prn", delim(tab) clear rowrange(18)

if `ii' == 95 {
g ind = substr(v1,1,68)
g data = substr(v1,69,.)
}
else {
 g ind = substr(v1,1,72)
 g data = substr(v1,73,.)
}

replace data = strtrim(data)
replace data = subinstr(data, "     ", " ",.)
replace data = subinstr(data, "    ", " ",.)
replace data = subinstr(data, "   ", " ",.)
replace data = subinstr(data, "  ", " ",.)
replace data = subinstr(data, "  ", " ",.)
split data, parse(" ") 

replace ind = subinstr(ind, ".", "",.)
replace ind = strtrim(ind)
rename ind ind`ii'

rename data1 a1a_n
rename data2 a1a_at
rename data4 a1a_capx
rename data5 a1a_sale
rename data6 a1a_ni 
rename data7 a1a_va 
rename data8 a1a_comp

* confirm all using mappped
merge m:1 ind`ii' using 6.Temp\levelkey_mne_pre98
drop _merge 
keep if keep_ind == 1

keep ind ind_short a1a_*
g year = 1900 + `ii'
tostring *, replace

if `ii' ~= 95 {
compress
append using temp9597 
}

save temp9597, replace
sleep 600

}







****








/* --------------------	*/
/* 		PARENT DATA     */
/* --------------------	*/




* 09-14: Affiliate financials
forvalues ii = 2009(1)2014{

di `ii'
*local ii = 2014
import excel 0.raw_inputs/BEA_MNE/US_abroad/US_parent_indy.xls, sheet(`ii') cellrange(A4:K97) firstrow clear

rename A ind
rename Totalassets a1p_at
rename Sales a1p_sale
rename Netincome a1p_ni
rename Valueadded a1p_va
rename Researchanddev a1p_rd
rename Compensationofemployees a1p_comp
rename Capitalexp a1p_capx

keep ind a1p*
drop if ind == ""

merge m:1 ind using 6.Temp\levelkey_mne_post99
drop _merge 
keep if keep_ind == 1
*drop ind

g year = `ii'
tostring *, replace 

if `ii' ~= 2009 {
compress
append using temp0914_par
}

save temp0914_par, replace
sleep 600
}



***



* 99-08
forvalues ii = 1999(1)2008{

	di "`ii'
	*local ii = 1999
	import excel 0.raw_inputs/BEA_MNE/US_abroad/`ii'/Tab2M1.xls, cellrange(A4) firstrow clear

	rename A ind
	rename Totalassets a1p_at
	rename Sales a1p_sale
	rename Netincome a1p_ni
	rename Compensationofemployees a1p_comp
	rename Capitalexp a1p_capx

	*VA named gross product pre01
	if `ii' < 2001 {
		rename Grossproduct a1p_va
	}
	else{
		rename Valueadded a1p_va
	}

	keep ind a1p_*
	rename ind ind`ii'

	merge m:1 ind`ii' using 6.Temp\levelkey_mne_post99
	keep if _merge == 3
	drop if ind`ii' == "" | ind`ii' == " "
	drop _merge 
	keep if keep_ind == 1

	keep ind`ii' ind_short a1p*
	rename ind`ii' ind

	g year = `ii'
	tostring *, replace 

	if `ii' ~= 1999 {
	compress
	append using temp9908_par
	}

	save temp9908_par, replace
	sleep 600
}



* 98
import excel 0.raw_inputs/BEA_MNE/US_abroad/1998/Tab2L1.xls, cellrange(A13) firstrow clear

rename A ind
rename B a1p_at
rename E a1p_capx
rename F a1p_sale
rename G a1p_ni
rename H a1p_va
rename J a1p_comp

keep ind a1p_*
drop if ind == ""

replace ind = subinstr(ind, ".", "",.)
replace ind = strtrim(ind)
rename ind ind98_p

merge m:1 ind98_p using 6.Temp\levelkey_mne_pre98
drop _merge 
keep if keep_ind == 1

g year = 1998
keep ind_short year a1*
order ind_short year a1*

tostring *, replace
save temp98_par, replace



***



*95-97
forvalues ii = 95(1)97{

*local ii = 97
import delimited "0.raw_inputs\BEA_MNE\US_abroad\1993-1997\2L1_`ii'.prn", delim(tab) clear rowrange(15)

if `ii' == 95 {
g ind = substr(v1,1,68)
g data = substr(v1,69,.)
}
else {
 g ind = substr(v1,1,72)
 g data = substr(v1,73,.)
}

replace data = strtrim(data)
replace data = subinstr(data, "     ", " ",.)
replace data = subinstr(data, "    ", " ",.)
replace data = subinstr(data, "   ", " ",.)
replace data = subinstr(data, "  ", " ",.)
replace data = subinstr(data, "  ", " ",.)
split data, parse(" ") 

replace ind = subinstr(ind, ".", "",.)
replace ind = strtrim(ind)
rename ind ind`ii'_p

rename data1 a1p_at
rename data4 a1p_capx
rename data5 a1p_sale
rename data6 a1p_ni 
rename data7 a1p_va 
rename data9 a1p_comp

* confirm all using mappped
merge m:1 ind`ii'_p using 6.Temp\levelkey_mne_pre98
drop _merge 
keep if keep_ind == 1

keep ind_short a1p_*
g year = 1900 + `ii'
tostring *, replace

if `ii' ~= 95 {
compress
append using temp9597_par 
}

save temp9597_par, replace
sleep 600

}

erase 6.Temp\levelkey_mne_pre98.dta
erase 6.Temp\levelkey_mne_post99.dta


***



/* ---------------------------- */
/* 		CONSOLIDATE AND OUTPUT 	*/
/* ---------------------------- */

use temp0914, clear
append using temp9908
append using temp98
append using temp9597

destring a1*, replace ignore(",") force
collapse (sum) a1a_*, by(ind_short year)
save tempaff, replace

use temp0914_par, clear
append using temp9908_par
append using temp98_par
append using temp9597_par
destring a1p*, replace ignore(",") force
collapse (sum) a1p_*, by(ind_short year)

merge 1:1 ind_short year using tempaff
drop _merge
erase tempaff.dta

ds ind_short year, not
foreach X in `r(varlist)'{
	replace `X' = . if `X' == 0
}

order ind_short year a1*
sort ind_short year
compress
destring year, replace
save 2.intermediate/BEA_mne_new, replace

* add 90-94 for total industries only
import excel 0.raw_inputs\BEA_MNE\US_abroad\9094_total.xlsx, firstrow clear
rename ind ind_short
drop *liab *eq *rd *nemp
append using 2.intermediate/BEA_mne_new

* Drop missing values and industries which do not map cleanly
drop if ind_short == ""
*drop if year < 1999 & ind_short ~= "All"
sort ind_sh year

save 2.intermediate/BEA_mne, replace
erase temp0914.dta
erase temp9908.dta
erase temp98.dta
erase temp9597.dta
erase temp0914_par.dta
erase temp9908_par.dta
erase temp98_par.dta
erase temp9597_par.dta

* Selected tests from manual replication: 
use 2.intermediate/BEA_mne, clear
egen a1a_ntot = sum(a1a_n) if ind_s ~= "All", by (year)
egen a1p_capxtot = sum(a1p_capx) if ind_s ~= "All", by (year)
egen a1a_capxtot = sum(a1a_capx) if ind_s ~= "All", by (year)

*99-08
g test6 =  (a1a_ntot  - 24168)/a1a_ntot if year == 2006 & ind_s == "Min_Other"
g test7 =  (a1a_capxtot  - 155078 )/a1a_capxtot   if year == 2006 & ind_s == "Min_Other"
g test8 =  (a1p_capxtot  - 445295 )/a1p_capxtot if year == 2006 & ind_s == "Min_Other"

* >2009
g test9 =  (a1a_ntot  - 32763 )/a1a_ntot if year == 2014 & ind_s == "Min_Other"
g test10 =  (a1p_capxtot - 713545 )/a1p_capxtot if year == 2014 & ind_s == "Min_Other"
g test11 =  (a1p_capxtot - 431796)/a1p_capxtot if year == 2009 & ind_s == "Min_Other"

su test*
drop test*
pause



